/******************************************************************************
	Date: July 2022 
	
	Program: 
	- Clean BRN data
	- Harmonize sector codes
	
	Input:
	- brn data in $brnpath

	Output:
	- $commonpath/brn_wof_$firstyearbrn$lastyearbrn
	
*****************************************************************************/

* Import BRN txt file
log using $brnpath/import_brn$firstyearbrn$lastyearbrn.txt, comma replace
clear
di "1993"
global year 1993
insheet using $brnpath/brn1993.csv, names
do $dopath/labelbrn
sum va product tot_sales exp_sales dom_sales
*tab ape 
g year=1993
save $brnpath/brn_$firstyearbrn$lastyearbrn, replace
forvalues i=1994(1)$lastyearbrn{
	global year `i'
	clear
	di "`i'"
	insheet using $brnpath/brn`i'.csv, names
	do $dopath/labelbrn
	sum va product tot_sales exp_sales dom_sales
	*tab ape
	g year=`i'
	if `i'>=2000{
		tostring datcs, replace
	}
	append using $brnpath/brn_$firstyearbrn$lastyearbrn
	save $brnpath/brn_$firstyearbrn$lastyearbrn, replace
}

#delimit ;
foreach i in income tot_cost op_cost fin_cost extra_cost participation 
income_tax tot_revenue op_revenue fin_revenue extra_revenue op_income 
fin_income before_tax_income extra_income merch_purchases delta_inventories_merch
raw_purchases delta_inventories_raw other_ext_cost tax wages social_cont 
tot_sales tot_sales_merch tot_sales_output tot_sales_services exp_sales 
exp_sales_merch exp_sales_output exp_sales_services dom_sales dom_sales_merch 
dom_sales_output dom_sales_services delta_inventories_output fin_cost_prov 
fin_cost_interest fin_cost_ermvts fin_cost_market fin_revenue_inv 
fin_revenue_otherinv fin_revenue_other fin_revenue_prov_reversal 
fin_revenue_ermvts fin_revenue_market fixed_assets circulating_assets 
tot_assets liabilities_equity liabilities_debt liabilities_tot 
operating_intangible_assets patent_intangible_assets goodwill_intangible_assets 
other_intangible_assets advance_intangible_assets lands_tangible_assets 
buildings_tangible_assets equipment_tangible_assets other_tangible_assets 
receivables_fin_assets other_holdings_fin_assets loans_fin_assets 
other_fin_assets inventories_raw inventories_goods inventories_serv 
inventories_inputs inventories_merch net_income convertible_bonds 
other_bonds bank_loans other_loans debt_suppliers debt_tax debt_bank 
other_assets rd_intangible_assets liability_other equity_social_capital 
empdett equity_other product prodven va vabcf ebe caf owner_equity 
own_resources stable_debt stable_resources debt total_debt working_capital 
need_working_capital gross_fixed_assets capital_asset
{;
	replace `i'=`i'/6.55957 if year<2001;
};
#delimit cr
save $brnpath/brn_$firstyearbrn$lastyearbrn, replace
save $commonpath/brn_$firstyearbrn$lastyearbrn, replace

use $commonpath/brn_$firstyearbrn$lastyearbrn, clear
bys year: egen vatot=sum(va)
tabulate year, summarize(vatot)
use $commonpath/brn_$firstyearbrn$lastyearbrn, clear
drop if substr(ape_naf93,1,2)=="01"|substr(ape_naf93,1,2)=="65"|substr(ape_naf93,1,2)=="66"|substr(ape_naf93,1,2)=="67"|substr(ape_naf93,1,2)=="75"
drop if (substr(ape_rev1,1,2)=="01"|substr(ape_rev1,1,2)=="65"|substr(ape_rev1,1,2)=="66"|substr(ape_rev1,1,2)=="67"|substr(ape_rev1,1,2)=="75") & year==2002
drop if (substr(ape_rev1,1,2)=="01"|substr(ape_rev1,1,2)=="65"|substr(ape_rev1,1,2)=="66"|substr(ape_naf93,1,2)=="67"|substr(ape_rev1,1,2)=="75") & year>2002
save $commonpath/brn_wof_$firstyearbrn$lastyearbrn, replace
erase $brnpath/brn_$firstyearbrn$lastyearbrn.dta


* Preliminary part : Basic Cleaning of $commonpath/brn_$firstyearbrn$lastyearbrn, i.e. remove finance
use $commonpath/brn_$firstyearbrn$lastyearbrn, clear
drop if substr(ape_naf93,1,2)=="65"|substr(ape_naf93,1,2)=="66"|substr(ape_naf93,1,2)=="67"|substr(ape_naf93,1,2)=="75"
drop if (substr(ape_rev2,1,2)=="64"|substr(ape_rev2,1,2)=="65"|substr(ape_rev2,1,2)=="66"|substr(ape_rev2,1,2)=="84") 
save $commonpath/brn_wof_$firstyearbrn$lastyearbrn, replace


* Part 1 : Cleaning of the database 
use $commonpath/brn_wof_$firstyearbrn$lastyearbrn, clear
gsort siren year
* Clean wrong siren
drop if (siren == ""|siren == "."|siren == "0"|siren == "000000000")

* Hand sanity checks for top firms
gsort siren year
bys siren: gegen vabcf_mean=mean(vabcf)
bys siren: g first=1 if _n==1
gsort -vabcf_mean
g _=sum(first)
bys siren: gegen rank_vabcfmean=max(_)
drop _ first
tab siren if rank_vabcfmean<11
tab siren rank_vabcfmean if rank_vabcfmean<11

* SECRET DATA 01 HERE 
* do $dopath/SECRET_DATA_01.do

* harmonize naf codes over time
use $commonpath/brn_wof_$firstyearbrn$lastyearbrn, clear
sort siren year
drop if ape_rev2==""
drop if length(ape_rev2)==4
by siren: keep if _n==1
keep siren ape_rev2
rename ape_rev2 ape
sort siren
save $commonpath/temp, replace

use $commonpath/brn_wof_$firstyearbrn$lastyearbrn, clear
sort siren
merge m:1 siren using $commonpath/temp
drop _merge
save $commonpath/brn_wof_$firstyearbrn$lastyearbrn, replace

sort siren year
replace ape_rev2=ape_naf93 if length(ape_naf93)==5
replace ape_naf93="" if length(ape_naf93)==5
drop if ape_naf93=="" & length(ape_rev2)!=4
replace ape_naf93=ape_rev2 if ape_naf93=="" & length(ape_rev2)==4
sort siren year
by siren: keep if _n==1
keep siren ape_naf93
sort siren
merge 1:1 siren using $commonpath/temp
keep if _merge==3
g n=1
collapse (count) count_firms=n, by(ape_naf93 ape)
bys ape_naf93: egen tot=sum(count_firms)
g share=count_firms/tot
bys ape_naf93: egen max=max(share)
g max_ = 1 if max==share
bys ape_naf93: egen countmax=sum(max_)
keep if max_==1 & countmax==1
keep ape ape_naf93
rename ape ape2
sort ape_naf93
save $commonpath/temp, replace

use $commonpath/brn_wof_$firstyearbrn$lastyearbrn, clear
replace ape_naf93="" if length(ape_naf93)==5
sort ape_naf93
merge m:1 ape_naf93 using $commonpath/temp
replace ape=ape2 if _merge==3 & ape==""
drop ape2 _merge
save $commonpath/brn_wof_$firstyearbrn$lastyearbrn, replace

sort siren year
keep if ape!=""
by siren: keep if _n==1
rename ape ape2
keep siren ape2
sort siren
save $commonpath/temp, replace

use $commonpath/brn_wof_$firstyearbrn$lastyearbrn, clear
sort siren
merge m:1 siren using $commonpath/temp
replace ape=ape2 if ape=="" & _merge==3
drop _merge ape2
save $commonpath/brn_wof_$firstyearbrn$lastyearbrn, replace
